library(data.table)
library(ggplot2)
library(fixest)
## fixest 0.9.0, BREAKING changes! (Permanently remove this message with fixest_startup_msg(FALSE).) 
## - In i():
##     + the first two arguments have been swapped! Now it's i(factor_var, continuous_var) for interactions. 
##     + argument 'drop' has been removed (put everything in 'ref' now).
## - In feglm(): 
##     + the default family becomes 'gaussian' to be in line with glm(). Hence, for Poisson estimations, please use fepois() instead.
library(ggfortify)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(stringr)
subject.dt <- as.data.table(read.csv("/Users/hsiao/Desktop/Projects/HRS_Chao/LTC_TSLA/main_subject_0820.csv"))
fmlymmb.dt <- as.data.table(read.csv("/Users/hsiao/Desktop/Projects/HRS_Chao/LTC_TSLA/family_member_1005.csv"))
# what happened on augmented spouses
# Chou miscoded. She used the qser_no for all subjects over multiple observed years
# After change of the code, 405 problematic remains, while 326 of them is NAs, therefore can be omited.
# NA are mainnly other relationship in 1996 which should be omitted earlier
# the 79 remaining is mainly consisted of 同居老伴 ( 1993, 1996 old 沒有問同居)
# 
# spou<- fmlymmb.dt[relationship ==2 & ObsPartnerY ==0]
# sum(is.na(spou$marstat)) # 326
# spou<- spou[! is.na(marstat)]
# who <- fmlymmb.dt[ ObsPartnerY ==0 & relationship ==2 & is.na(marstat)]
# table(who)
# duplicated people
# main change :
# detected_1 added to detect if one fam member only exist in specific year, is he the only one ? 
# otherwise he will go through k_grouping.
# unif stands for the unificated code

# test<- copy(fmlymmb.dt)
# sum(duplicated(test$unif))

Family Member Personalities

Preprocess

fmlymmb.dt$gender = ifelse(fmlymmb.dt$gender == 1, 1, 0)  # gender 0 = females
fmlymmb.dt$partner = ifelse(fmlymmb.dt$relationship == 2, 1, 0) 
fmlymmb.dt$working = ifelse(fmlymmb.dt$work == 1, 1, 0)
fmlymmb.dt$unmarried = ifelse(fmlymmb.dt$marstat == 6, 1, 0) 
fmlymmb.dt$attend_at_least_uni = ifelse(fmlymmb.dt$edu >= 13, 1, 0) 
fmlymmb.dt$attend_at_least_high_school = ifelse(fmlymmb.dt$edu >=10, 1, 0) 
# age above 15 & unmarry -> 6, if under 15 -> NA
fmlymmb.dt$livTogether = ifelse(fmlymmb.dt$livstat == 0, 1, 0)
fmlymmb.dt$primaryADL = ifelse(fmlymmb.dt$adl_who_help == fmlymmb.dt$relationship, 1, 0)
fmlymmb.dt$primaryIADL = ifelse(fmlymmb.dt$iadl_who_help == 
                                  fmlymmb.dt$relationship, 1, 0)
fmlymmb.dt$fmNeedADL = ifelse(! is.na(fmlymmb.dt$adl_who_help) , 1, 0)
fmlymmb.dt$fmNeedIADL = ifelse(! is.na(fmlymmb.dt$iadl_who_help) , 1, 0)
fmlymmb.dt$subject_residency = as.character(fmlymmb.dt$subject_residency)
# Define degree of intimacy
subject.dt[, y_qser_no := paste0( survey_year, qser_no)]
fmlymmb.dt[, y_qser_no := paste0(survey_year, qser_no)]

non_married.obs <- subject.dt[marstat == 0]$y_qser_no
fmlymmb.dt.nmar <- fmlymmb.dt[y_qser_no %in% non_married.obs]
fmlymmb.dt$ObsPartnerY <- ifelse(fmlymmb.dt$y_qser_no %in% non_married.obs, 0, 1)

# fmlymmb.dt<- fmlymmb.dt[! (ObsPartnerY ==0 & relationship ==2 & is.na(marstat))]

Merging with Subject Data

subject.dt$rsex = ifelse(subject.dt$rsex == 1, 1, 0)
fmlymmb.dt <- merge(fmlymmb.dt, 
                    subject.dt[,c("qser_no", "rsex", "survey_year", "rage")], 
                    all.x = TRUE, 
                    by = c("qser_no", "survey_year"))

Summary

fm.with.need <- fmlymmb.dt[fmNeedADL == 1]
fm.primary.c <- fmlymmb.dt[primaryADL == 1]
fm.whot.need <- fmlymmb.dt[fmNeedADL == 0]
print(paste("People in families with need:", nrow(fm.with.need)))
## [1] "People in families with need: 8838"
print(paste("Number of Primary Caregiver:", nrow(fm.primary.c)))
## [1] "Number of Primary Caregiver: 997"
print(paste("People in families without need:", nrow(fm.whot.need)))
## [1] "People in families without need: 106317"

Age, Gender, Education, Marriage Status

rbind(fm.with.need[ObsPartnerY == 1,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )],
      fm.with.need[ObsPartnerY == 0,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )],
      fm.whot.need[ObsPartnerY == 1,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )],
      fm.whot.need[ObsPartnerY == 0,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )],
      fm.primary.c[ObsPartnerY == 1,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )],
      fm.primary.c[ObsPartnerY == 0,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE)
                      )])
##         AGE      EDU       SEX     PARTNER      WORK  UNMARRIED LVTOGETER
## 1: 44.86280 8.867423 0.4691261 0.150887574 0.5494908 0.14773680 0.4866838
## 2: 46.98345 9.011438 0.4631104 0.001857749 0.5700219 0.12334554 0.4167553
## 3: 40.01084 9.916187 0.4649134 0.170294387 0.5924159 0.20870854 0.5226831
## 4: 40.50267 9.666419 0.4777033 0.003217158 0.6164537 0.16943892 0.4348839
## 5: 57.21417 5.964646 0.2811448 0.602693603 0.3384354 0.08080808 0.9528620
## 6: 46.44000 8.535980 0.2431762 0.004962779 0.5298507 0.14214464 0.9131514
##       SUBSEX   SUBAGE
## 1: 0.5461538 74.68426
## 2: 0.2324841 80.72853
## 3: 0.5724889 67.36235
## 4: 0.3024129 73.17677
## 5: 0.5690236 74.40587
## 6: 0.2183623 81.41687

Regression

Families with need

fm.with.need$SubSameSex <- ifelse(fm.with.need$gender == fm.with.need$rsex, 1, 0)
need.reg1 <- feols(primaryADL ~ gender| survey_year, fm.with.need)
## NOTE: 1 observation removed because of NA values (RHS: 1).
need.reg2 <- feols(primaryADL ~ gender + age + age**2 | survey_year, fm.with.need)
## NOTE: 42 observations removed because of NA values (RHS: 42).
need.reg3 <- feols(primaryADL ~ gender + age + age**2 + edu | survey_year, fm.with.need)
## NOTE: 275 observations removed because of NA values (RHS: 275).
need.reg4 <- feols(primaryADL ~ gender + age + age**2 + edu + working | survey_year, fm.with.need)
## NOTE: 313 observations removed because of NA values (RHS: 313).
need.reg5 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried | survey_year, fm.with.need)
## NOTE: 590 observations removed because of NA values (RHS: 590).
need.reg6 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex | survey_year, fm.with.need)
## NOTE: 590 observations removed because of NA values (RHS: 590).
need.reg7 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + partner | survey_year, fm.with.need)
## NOTE: 590 observations removed because of NA values (RHS: 590).
need.reg8 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + partner + livTogether | survey_year, fm.with.need)
## NOTE: 596 observations removed because of NA values (RHS: 596).
need.reg9 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + partner + livTogether + subject_in_nursing_house | survey_year, fm.with.need)
## NOTE: 596 observations removed because of NA values (RHS: 596).
need.reg10 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + partner + livTogether  | survey_year, fm.with.need[ subject_in_nursing_house ==0])
## NOTE: 590 observations removed because of NA values (RHS: 590).
etable(need.reg1, need.reg2, need.reg3, need.reg4, need.reg5, need.reg6, need.reg7, need.reg7, need.reg8, need.reg10)
##                           need.reg1           need.reg2           need.reg3
## Dependent Var.:          primaryADL          primaryADL          primaryADL
##                                                                            
## gender          -0.0910*** (0.0087) -0.0926*** (0.0099) -0.0862*** (0.0095)
## age                                    -0.0010 (0.0006)    -0.0010 (0.0011)
## age square                          4.36e-5** (7.04e-6) 3.56e-5** (5.35e-6)
## edu                                                      -0.0059** (0.0009)
## working                                                                    
## unmarried                                                                  
## SubSameSex                                                                 
## partner                                                                    
## livTogether                                                                
## Fixed-Effects:  ------------------- ------------------- -------------------
## survey_year                     Yes                 Yes                 Yes
## _______________ ___________________ ___________________ ___________________
## S.E.: Clustered     by: survey_year     by: survey_year     by: survey_year
## Observations                  8,837               8,796               8,563
## R2                          0.02120             0.04737             0.05087
## Within R2                   0.02061             0.04683             0.05042
##                          need.reg4          need.reg5          need.reg6
## Dependent Var.:         primaryADL         primaryADL         primaryADL
##                                                                         
## gender          -0.0851** (0.0148) -0.0851** (0.0128) -0.0869** (0.0144)
## age               -0.0010 (0.0019)   -0.0050 (0.0042)   -0.0049 (0.0041)
## age square      3.48e-5* (1.19e-5) 6.86e-5. (2.99e-5) 6.69e-5. (2.91e-5)
## edu             -0.0058** (0.0011)  -0.0074* (0.0017)  -0.0075* (0.0016)
## working           -0.0026 (0.0214)   -0.0030 (0.0196)   -0.0030 (0.0195)
## unmarried                             0.0124 (0.0139)    0.0135 (0.0140)
## SubSameSex                                              -0.0094 (0.0089)
## partner                                                                 
## livTogether                                                             
## Fixed-Effects:  ------------------ ------------------ ------------------
## survey_year                    Yes                Yes                Yes
## _______________ __________________ __________________ __________________
## S.E.: Clustered    by: survey_year    by: survey_year    by: survey_year
## Observations                 8,525              8,248              8,248
## R2                         0.04969            0.05171            0.05191
## Within R2                  0.04934            0.05137            0.05157
##                           need.reg7           need.reg7           need.reg8
## Dependent Var.:          primaryADL          primaryADL          primaryADL
##                                                                            
## gender           -0.0699** (0.0115)  -0.0699** (0.0115)  -0.0755** (0.0093)
## age                 0.0070 (0.0045)     0.0070 (0.0045)    0.0158* (0.0039)
## age square      -9.22e-5. (3.83e-5) -9.22e-5. (3.83e-5) -0.0002** (3.45e-5)
## edu               -0.0041. (0.0018)   -0.0041. (0.0018)    -0.0028 (0.0016)
## working             0.0055 (0.0135)     0.0055 (0.0135)     0.0004 (0.0100)
## unmarried           0.0223 (0.0110)     0.0223 (0.0110)   -0.0264. (0.0115)
## SubSameSex         0.0481* (0.0115)    0.0481* (0.0115)    0.0450* (0.0114)
## partner          0.4775*** (0.0446)  0.4775*** (0.0446)   0.3356** (0.0458)
## livTogether                                              0.2324*** (0.0125)
## Fixed-Effects:  ------------------- ------------------- -------------------
## survey_year                     Yes                 Yes                 Yes
## _______________ ___________________ ___________________ ___________________
## S.E.: Clustered     by: survey_year     by: survey_year     by: survey_year
## Observations                  8,248               8,248               8,242
## R2                          0.14906             0.14906             0.24413
## Within R2                   0.14875             0.14875             0.24385
##                          need.reg10
## Dependent Var.:          primaryADL
##                                    
## gender           -0.0831** (0.0116)
## age                0.0165* (0.0040)
## age square      -0.0002** (3.49e-5)
## edu                -0.0033 (0.0018)
## working           -1.38e-5 (0.0106)
## unmarried          -0.0208 (0.0129)
## SubSameSex         0.0487* (0.0110)
## partner           0.3646** (0.0424)
## livTogether      0.2289*** (0.0125)
## Fixed-Effects:  -------------------
## survey_year                     Yes
## _______________ ___________________
## S.E.: Clustered     by: survey_year
## Observations                  7,607
## R2                          0.24881
## Within R2                   0.24851

Decision Tree

library(rpart)
library(rpart.plot)
simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + partner + working + SubSameSex + 
                       unmarried + livTogether, data = fm.with.need, cp=.008)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + partner + working + SubSameSex + 
                       unmarried, data = fm.with.need, cp=.003)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + working + SubSameSex + 
                       unmarried, data = fm.with.need, cp=.003)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

# which spouse would be the main giver
## omitted some very important variables : partner income, partner health status
simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + working + SubSameSex + 
                       unmarried, data = fm.with.need[ partner ==1 ], cp=.003)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

PCA Clustering

data.pca <- fm.with.need[,c("gender", "age", "edu", "working", "unmarried", "SubSameSex", "partner", "primaryADL")]
data.pca$primaryADL <- factor(data.pca$primaryADL)
simple.pca <- prcomp(formula = ~ .,  
              data = data.pca[,1:7], 
              scale = TRUE)
# take away livtogether
plot(simple.pca, type="line") 
abline(h=1, col="blue")

simple.pca$rotation[,1:3]
##                   PC1        PC2          PC3
## gender     -0.1387858  0.6613955 -0.057681467
## age         0.5284127  0.1662838  0.229305578
## edu        -0.4666603  0.1445338  0.004934944
## working    -0.3486703  0.4194933  0.401354192
## unmarried  -0.3174198 -0.1157329 -0.737809873
## SubSameSex -0.1833693 -0.5437932  0.367102949
## partner     0.4772798  0.1701055 -0.322264293
first.pca <- simple.pca$rotation[, 1]   #  第一主成份
second.pca <- simple.pca$rotation[, 2]  #  第二主成份
third.pca <- simple.pca$rotation[, 3]   #  第三主成份
dotchart(first.pca[order(first.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC1",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

dotchart(second.pca[order(second.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC2",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

dotchart(third.pca[order(third.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC3",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

library(devtools)
## 載入需要的套件:usethis
install_github('sinhrks/ggfortify')
## Skipping install of 'ggfortify' from a github remote, the SHA1 (195b1fb1) has not changed since last install.
##   Use `force = TRUE` to force installation
library(ggfortify); library(ggplot2)
autoplot(simple.pca, data = data.pca, colour = 'primaryADL', alpha=0.1, loadings = TRUE, loadings.colour = 'gray', loadings.label = TRUE, loadings.label.alpha = 0.4, loadings.label.colour = "black")
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 2 has 8248 rows but longest item has 8838; recycled with
## remainder.

Excluding Married Observations

Summary

Clean Those with Partners

fmlymmb.dt.nmar <- fmlymmb.dt[ObsPartnerY == 0]
odd.obs <- unique(fmlymmb.dt.nmar[relationship==2]$qser_no) # 同居但沒結婚 72 
fmlymmb.dt.nmar <- fmlymmb.dt.nmar[!(fmlymmb.dt.nmar$qser_no %in% odd.obs)]

table(fmlymmb.dt.nmar$relationship)
## 
##    3    9   10   11   12   13   14   15   16   17   18   19   21   22   23   24 
##    3    1    1   37  102    7   22    1    4    2    2    2   82   25    6    4 
##   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42 
##   42    3   12  158 4019 2967 1604  771  398   12  162    2   52   95 3691 2438 
##   43   44   45   46   47   49   50   51   52   53   54   59   61   62   70   72 
## 1383  714  386   13  248   55    1 1427 1073  245   78  161  351    9   38   56 
##   73   76   77   78   79   81   91   92   93   94   95   96   97 
##   99    1   17    1   78    2 1185  655  375  143   79    1    2
# fmlymmb.dt.nmar<- fmlymmb.dt.nmar[ relationship %in% c()]
fm.with.need <- fmlymmb.dt.nmar[fmNeedADL == 1]
fm.primary.c <- fmlymmb.dt.nmar[primaryADL == 1]
fm.whot.need <- fmlymmb.dt.nmar[fmNeedADL == 0]
print(paste("People in families with need:", nrow(fm.with.need)))
## [1] "People in families with need: 3713"
print(paste("Number of Primary Caregiver:", nrow(fm.primary.c)))
## [1] "Number of Primary Caregiver: 400"
print(paste("People in families without need:", nrow(fm.whot.need)))
## [1] "People in families without need: 21890"

Age, Gender, Education, Marriage Status

rbind(fm.with.need[,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE),
                      Sub_in_nursing = mean( subject_in_nursing_house, na.rm = TRUE)
                      )],
      fm.whot.need[,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE),
                      Sub_in_nursing = mean( subject_in_nursing_house, na.rm = TRUE)
                      )],
      fm.primary.c[,.(AGE = mean(age, na.rm = TRUE), 
                      EDU = mean(edu, na.rm = TRUE), 
                      SEX = mean(gender, na.rm = TRUE),
                      PARTNER = mean(partner, na.rm = TRUE),
                      WORK = mean(working, na.rm = TRUE),
                      UNMARRIED = mean(unmarried, na.rm = TRUE),
                      LVTOGETER = mean(livTogether, na.rm = TRUE),
                      SUBSEX = mean(rsex, na.rm = TRUE), 
                      SUBAGE = mean(rage, na.rm = TRUE),
                      Sub_in_nursing = mean( subject_in_nursing_house, na.rm = TRUE)
                      )])
##         AGE      EDU       SEX PARTNER      WORK UNMARRIED LVTOGETER    SUBSEX
## 1: 47.00108 8.996959 0.4651225       0 0.5703971 0.1215675 0.4197031 0.2267708
## 2: 40.50402 9.669533 0.4783463       0 0.6183188 0.1693116 0.4349837 0.2957515
## 3: 46.36524 8.570000 0.2450000       0 0.5288221 0.1407035 0.9125000 0.2125000
##      SUBAGE Sub_in_nursing
## 1: 80.82758    0.105844331
## 2: 73.28595    0.002832344
## 3: 81.40000    0.010000000

Regression

Family with Need (Observations Not Having Partners)

fm.with.need$SubSameSex <- ifelse(fm.with.need$gender == fm.with.need$rsex, 1, 0)
need.reg1 <- feols(primaryADL ~ gender| survey_year, fm.with.need)
need.reg2 <- feols(primaryADL ~ gender + age + age**2 | survey_year, fm.with.need)
## NOTE: 22 observations removed because of NA values (RHS: 22).
need.reg3 <- feols(primaryADL ~ gender + age + age**2 + edu | survey_year, fm.with.need)
## NOTE: 118 observations removed because of NA values (RHS: 118).
need.reg4 <- feols(primaryADL ~ gender + age + age**2 + edu + working | survey_year, fm.with.need)
## NOTE: 133 observations removed because of NA values (RHS: 133).
need.reg5 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried | survey_year, fm.with.need)
## NOTE: 252 observations removed because of NA values (RHS: 252).
need.reg6 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex | survey_year, fm.with.need)
## NOTE: 252 observations removed because of NA values (RHS: 252).
need.reg7 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + livTogether | survey_year, fm.with.need)
## NOTE: 257 observations removed because of NA values (RHS: 257).
need.reg8 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + livTogether | survey_year, fm.with.need)
## NOTE: 257 observations removed because of NA values (RHS: 257).
need.reg9 <- feols(primaryADL ~ gender + age + age**2 + edu + working + unmarried + SubSameSex + livTogether + subject_in_nursing_house + subject_residency | survey_year, fm.with.need)
## NOTE: 257 observations removed because of NA values (RHS: 257).
need.reg10 <- feols(primaryADL ~ gender + age + age**2 + attend_at_least_high_school + working + unmarried + SubSameSex + livTogether + subject_in_nursing_house + subject_residency | survey_year, fm.with.need)
## NOTE: 257 observations removed because of NA values (RHS: 257).
need.reg11 <- feols(primaryADL ~ gender + age + age**2 + attend_at_least_uni + working + unmarried + SubSameSex + livTogether + subject_in_nursing_house + subject_residency | survey_year, fm.with.need)
## NOTE: 257 observations removed because of NA values (RHS: 257).
etable(need.reg1, need.reg2, need.reg3, need.reg4, need.reg5, need.reg6, need.reg7, need.reg8, need.reg9, need.reg10, need.reg11)
##                                      need.reg1            need.reg2
## Dependent Var.:                     primaryADL           primaryADL
##                                                                    
## gender                      -0.0954** (0.0142)   -0.0932** (0.0139)
## age                                               0.0056** (0.0009)
## age square                                     -7.39e-5** (1.42e-5)
## edu                                                                
## working                                                            
## unmarried                                                          
## SubSameSex                                                         
## livTogether                                                        
## subject_in_nursing_house                                           
## subject_residency10                                                
## subject_residency11                                                
## subject_residency12                                                
## subject_residency13                                                
## subject_residency14                                                
## subject_residency15                                                
## subject_residency16                                                
## subject_residency17                                                
## subject_residency18                                                
## subject_residency19                                                
## subject_residency2                                                 
## subject_residency20                                                
## subject_residency21                                                
## subject_residency22                                                
## subject_residency3                                                 
## subject_residency4                                                 
## subject_residency5                                                 
## subject_residency6                                                 
## subject_residency7                                                 
## subject_residency8                                                 
## subject_residency9                                                 
## attend_at_least_high_school                                        
## attend_at_least_uni                                                
## Fixed-Effects:              ------------------ --------------------
## survey_year                                Yes                  Yes
## ___________________________ __________________ ____________________
## S.E.: Clustered                by: survey_year      by: survey_year
## Observations                             3,713                3,691
## R2                                     0.02374              0.03123
## Within R2                              0.02353              0.03106
##                                        need.reg3           need.reg4
## Dependent Var.:                       primaryADL          primaryADL
##                                                                     
## gender                        -0.0896** (0.0139)  -0.0891** (0.0176)
## age                             0.0055* (0.0014)    0.0055* (0.0019)
## age square                  -7.78e-5** (1.33e-5) -7.78e-5* (1.71e-5)
## edu                           -0.0037** (0.0005)  -0.0036** (0.0005)
## working                                             -0.0002 (0.0135)
## unmarried                                                           
## SubSameSex                                                          
## livTogether                                                         
## subject_in_nursing_house                                            
## subject_residency10                                                 
## subject_residency11                                                 
## subject_residency12                                                 
## subject_residency13                                                 
## subject_residency14                                                 
## subject_residency15                                                 
## subject_residency16                                                 
## subject_residency17                                                 
## subject_residency18                                                 
## subject_residency19                                                 
## subject_residency2                                                  
## subject_residency20                                                 
## subject_residency21                                                 
## subject_residency22                                                 
## subject_residency3                                                  
## subject_residency4                                                  
## subject_residency5                                                  
## subject_residency6                                                  
## subject_residency7                                                  
## subject_residency8                                                  
## subject_residency9                                                  
## attend_at_least_high_school                                         
## attend_at_least_uni                                                 
## Fixed-Effects:              -------------------- -------------------
## survey_year                                  Yes                 Yes
## ___________________________ ____________________ ___________________
## S.E.: Clustered                  by: survey_year     by: survey_year
## Observations                               3,595               3,580
## R2                                       0.03121             0.03102
## Within R2                                0.03109             0.03086
##                                      need.reg5          need.reg6
## Dependent Var.:                     primaryADL         primaryADL
##                                                                  
## gender                      -0.0859** (0.0165)  -0.0493. (0.0226)
## age                           -0.0003 (0.0045)    0.0001 (0.0044)
## age square                  -2.68e-5 (3.76e-5) -3.22e-5 (3.68e-5)
## edu                         -0.0049** (0.0008) -0.0048** (0.0008)
## working                       -0.0019 (0.0123)   -0.0030 (0.0121)
## unmarried                     -0.0039 (0.0112)   -0.0085 (0.0133)
## SubSameSex                                       0.0664* (0.0199)
## livTogether                                                      
## subject_in_nursing_house                                         
## subject_residency10                                              
## subject_residency11                                              
## subject_residency12                                              
## subject_residency13                                              
## subject_residency14                                              
## subject_residency15                                              
## subject_residency16                                              
## subject_residency17                                              
## subject_residency18                                              
## subject_residency19                                              
## subject_residency2                                               
## subject_residency20                                              
## subject_residency21                                              
## subject_residency22                                              
## subject_residency3                                               
## subject_residency4                                               
## subject_residency5                                               
## subject_residency6                                               
## subject_residency7                                               
## subject_residency8                                               
## subject_residency9                                               
## attend_at_least_high_school                                      
## attend_at_least_uni                                              
## Fixed-Effects:              ------------------ ------------------
## survey_year                                Yes                Yes
## ___________________________ __________________ __________________
## S.E.: Clustered                by: survey_year    by: survey_year
## Observations                             3,461              3,461
## R2                                     0.03310            0.04071
## Within R2                              0.03301            0.04062
##                                      need.reg7          need.reg8
## Dependent Var.:                     primaryADL         primaryADL
##                                                                  
## gender                       -0.0561* (0.0192)  -0.0561* (0.0192)
## age                           0.0153* (0.0045)   0.0153* (0.0045)
## age square                  -0.0002* (3.95e-5) -0.0002* (3.95e-5)
## edu                          -0.0035. (0.0015)  -0.0035. (0.0015)
## working                       -0.0136 (0.0100)   -0.0136 (0.0100)
## unmarried                    -0.0473* (0.0132)  -0.0473* (0.0132)
## SubSameSex                    0.0594* (0.0209)   0.0594* (0.0209)
## livTogether                 0.2827*** (0.0021) 0.2827*** (0.0021)
## subject_in_nursing_house                                         
## subject_residency10                                              
## subject_residency11                                              
## subject_residency12                                              
## subject_residency13                                              
## subject_residency14                                              
## subject_residency15                                              
## subject_residency16                                              
## subject_residency17                                              
## subject_residency18                                              
## subject_residency19                                              
## subject_residency2                                               
## subject_residency20                                              
## subject_residency21                                              
## subject_residency22                                              
## subject_residency3                                               
## subject_residency4                                               
## subject_residency5                                               
## subject_residency6                                               
## subject_residency7                                               
## subject_residency8                                               
## subject_residency9                                               
## attend_at_least_high_school                                      
## attend_at_least_uni                                              
## Fixed-Effects:              ------------------ ------------------
## survey_year                                Yes                Yes
## ___________________________ __________________ __________________
## S.E.: Clustered                by: survey_year    by: survey_year
## Observations                             3,456              3,456
## R2                                     0.19913            0.19913
## Within R2                              0.19906            0.19906
##                                      need.reg9         need.reg10
## Dependent Var.:                     primaryADL         primaryADL
##                                                                  
## gender                       -0.0555* (0.0178)  -0.0548* (0.0184)
## age                           0.0146* (0.0044)   0.0141* (0.0043)
## age square                  -0.0002* (3.65e-5) -0.0002* (3.62e-5)
## edu                          -0.0039. (0.0018)                   
## working                       -0.0115 (0.0096)   -0.0088 (0.0096)
## unmarried                    -0.0555* (0.0130)  -0.0524* (0.0126)
## SubSameSex                    0.0577* (0.0199)   0.0579* (0.0203)
## livTogether                 0.2791*** (0.0043) 0.2793*** (0.0040)
## subject_in_nursing_house      -0.0236 (0.0178)   -0.0225 (0.0184)
## subject_residency10          0.0653** (0.0079)  0.0723** (0.0093)
## subject_residency11           -0.0357 (0.0213)   -0.0331 (0.0225)
## subject_residency12          -0.0294. (0.0137)   -0.0307 (0.0147)
## subject_residency13            0.0071 (0.0378)    0.0072 (0.0406)
## subject_residency14            0.0248 (0.0127)    0.0246 (0.0125)
## subject_residency15           -0.0022 (0.0185)   7.11e-5 (0.0176)
## subject_residency16           -0.0069 (0.0253)   -0.0080 (0.0237)
## subject_residency17         0.0265*** (0.0029)  0.0233** (0.0041)
## subject_residency18           -0.0351 (0.0305)   -0.0372 (0.0304)
## subject_residency19          -0.0279* (0.0061) -0.0283** (0.0059)
## subject_residency2            -0.0360 (0.0178)   -0.0292 (0.0152)
## subject_residency20            0.0257 (0.0325)    0.0265 (0.0313)
## subject_residency21           -0.0399 (0.0291)   -0.0446 (0.0294)
## subject_residency22            0.0046 (0.0344)   -0.0005 (0.0341)
## subject_residency3             0.0019 (0.0228)    0.0014 (0.0218)
## subject_residency4            -0.0607 (0.0756)   -0.0674 (0.0730)
## subject_residency5             0.0262 (0.0277)    0.0265 (0.0288)
## subject_residency6             0.0640 (0.0512)    0.0678 (0.0502)
## subject_residency7             0.0102 (0.0126)    0.0085 (0.0133)
## subject_residency8            -0.0106 (0.0060)  -0.0126. (0.0050)
## subject_residency9            -0.0224 (0.0150)   -0.0208 (0.0140)
## attend_at_least_high_school                     -0.0559* (0.0152)
## attend_at_least_uni                                              
## Fixed-Effects:              ------------------ ------------------
## survey_year                                Yes                Yes
## ___________________________ __________________ __________________
## S.E.: Clustered                by: survey_year    by: survey_year
## Observations                             3,456              3,456
## R2                                     0.20424            0.20851
## Within R2                              0.20416            0.20843
##                                     need.reg11
## Dependent Var.:                     primaryADL
##                                               
## gender                       -0.0537* (0.0185)
## age                           0.0145* (0.0046)
## age square                  -0.0002* (4.02e-5)
## edu                                           
## working                       -0.0108 (0.0083)
## unmarried                    -0.0529* (0.0141)
## SubSameSex                    0.0586* (0.0198)
## livTogether                 0.2777*** (0.0048)
## subject_in_nursing_house      -0.0257 (0.0197)
## subject_residency10          0.0825** (0.0137)
## subject_residency11           -0.0356 (0.0218)
## subject_residency12          -0.0307. (0.0115)
## subject_residency13            0.0103 (0.0411)
## subject_residency14            0.0236 (0.0116)
## subject_residency15           -0.0011 (0.0178)
## subject_residency16           -0.0084 (0.0245)
## subject_residency17           0.0233* (0.0067)
## subject_residency18           -0.0355 (0.0295)
## subject_residency19          -0.0307* (0.0067)
## subject_residency2            -0.0307 (0.0177)
## subject_residency20            0.0264 (0.0374)
## subject_residency21           -0.0481 (0.0279)
## subject_residency22            0.0025 (0.0342)
## subject_residency3             0.0060 (0.0225)
## subject_residency4            -0.0597 (0.0723)
## subject_residency5             0.0215 (0.0287)
## subject_residency6             0.0664 (0.0522)
## subject_residency7             0.0109 (0.0131)
## subject_residency8           -0.0150* (0.0035)
## subject_residency9            -0.0295 (0.0177)
## attend_at_least_high_school                   
## attend_at_least_uni         -0.0692** (0.0091)
## Fixed-Effects:              ------------------
## survey_year                                Yes
## ___________________________ __________________
## S.E.: Clustered                by: survey_year
## Observations                             3,456
## R2                                     0.20828
## Within R2                              0.20820

Decision Tree

simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + working + 
                       unmarried + livTogether, data = fm.with.need, cp=.008)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       edu + working + SubSameSex + 
                       unmarried, data = fm.with.need, cp=.003)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

simple.tree <- rpart(primaryADL ~ gender + age + age**2 + 
                       attend_at_least_uni + attend_at_least_high_school + working + SubSameSex + 
                       unmarried, data = fm.with.need, cp=.003)
rpart.plot(simple.tree, faclen=0, 
           box.palette="RdBu", 
           fallen.leaves=TRUE, 
           shadow.col="gray", 
           nn=TRUE)

PCA

data.pca <- fm.with.need[,c("gender", "age", "edu", "working", 
                            "unmarried", "SubSameSex", "primaryADL")]
data.pca$primaryADL <- factor(data.pca$primaryADL)
simple.pca <- prcomp(formula = ~ .,  
              data = data.pca[,1:6], 
              scale = TRUE)   
plot(simple.pca, type="line") 
abline(h=1, col="blue")

simple.pca$rotation[,1:3]
##                   PC1        PC2         PC3
## gender     -0.4514577  0.4527224 -0.23430113
## age         0.3896267  0.5306502  0.02055758
## edu        -0.4706090 -0.1923960  0.34688539
## working    -0.4409099  0.1265690  0.59644448
## unmarried  -0.2862820 -0.4933359 -0.55438207
## SubSameSex  0.3828123 -0.4658718  0.40157862
first.pca <- simple.pca$rotation[, 1]   #  第一主成份
second.pca <- simple.pca$rotation[, 2]  #  第二主成份
third.pca <- simple.pca$rotation[, 3]   #  第三主成份
dotchart(first.pca[order(first.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC1",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

dotchart(second.pca[order(second.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC2",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

dotchart(third.pca[order(third.pca, decreasing=FALSE)] ,   # 排序後的係數
         main="Loading Plot for PC3",                      # 主標題
         xlab="Variable Loadings",                         # x軸的標題
         col="black")

# !!
autoplot(simple.pca, data = data.pca, colour = 'primaryADL', alpha=0.1, loadings = TRUE, loadings.colour = 'gray', loadings.label = TRUE, loadings.label.alpha = 0.4, loadings.label.colour = "black")
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 2 has 3461 rows but longest item has 3713; recycled with
## remainder.